library(here)
library(tidyverse)
library(stringr) # for parsing r string 
library(jsonlite) # for parsing r string 
library(ggiraphExtra)
library(plotrix)
RT_data <- read_csv(here('data/processed_data/trimmed_RTdata.csv'))
pref_data <- read_csv(here('data/processed_data/trimmed_prefdata.csv'))
similarity_data <- read_csv(here('data/processed_data/trimmed_similaritydata.csv'))
complexity_data <- read_csv(here('data/processed_data/trimmed_complexitydata.csv'))
demog_data <- read_csv(here('data/processed_data/trimmed_demogdata.csv'))

Descriptive Info

N = 167

sample size

RT_data %>% 
  distinct(subject) %>% 
  count()
## # A tibble: 1 x 1
##       n
##   <int>
## 1   161

demographic

age

demog_data %>% 
  ggplot(aes(x = age)) + 
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ethnicity

demog_data %>% 
  ggplot(aes(x = ethnicity)) + 
  geom_histogram(stat = "count") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
## Warning: Ignoring unknown parameters: binwidth, bins, pad

Gender

demog_data %>% 
  ggplot(aes(x = gender)) + 
  geom_histogram(stat = "count") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
## Warning: Ignoring unknown parameters: binwidth, bins, pad

education

demog_data %>% 
  ggplot(aes(x = education)) + 
  geom_histogram(stat = "count") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
## Warning: Ignoring unknown parameters: binwidth, bins, pad

RT raw

overall

RT_data %>% 
   ggplot(aes(x = rt)) + 
  geom_histogram() + 
  scale_x_log10() 
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

by trial type

RT_data %>% 
   ggplot(aes(x = rt)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_wrap(~trial_type)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

by trial complexity

RT_data %>% 
  ggplot(aes(x = rt)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_wrap(~trial_complexity)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

by block type

RT_data %>% 
   ggplot(aes(x = rt)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_wrap(~block_type)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

by item id

simple

RT_data %>% 
  filter(trial_complexity == "simple") %>% 
   ggplot(aes(x = rt)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_wrap(~item_id)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

complex

RT_data %>% 
  filter(trial_complexity == "complex") %>% 
   ggplot(aes(x = rt)) + 
  geom_histogram() + 
  scale_x_log10() + 
  facet_wrap(~item_id)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

RT basic plotting

normal

summarized <- RT_data %>%
group_by(trial_number, item_type, trial_complexity) %>% 
summarise(meanRT=mean(rt), maxRT=max(rt), minRT=min(rt), medianRT=median(rt), Std=sd(rt), SE = std.error(rt), n = n())
## `summarise()` regrouping output by 'trial_number', 'item_type' (override with `.groups` argument)
ggplot(summarized, aes(x=trial_number, y=medianRT, colour=item_type)) + geom_line() + geom_errorbar(aes(ymin=medianRT-SE, ymax=medianRT+SE),
                width=0.8, size = 0.8, position = position_dodge(width = 0.2), show.legend = FALSE, alpha = 0.8) +
  geom_point(position = position_dodge(width = 0.2), size=2.5) + geom_line(size=1.2, position = position_dodge(width = 0.2)) + ylab("RT [msec]") +
  theme_gray()  + theme(
    panel.grid.minor = element_blank(), 
    plot.title = element_text(hjust=0.5, size=22, face="bold"),
    axis.title.x = element_text(size=18, face='bold'),
    axis.title.y = element_text(size=18, face='bold'),
    axis.text = element_text(size=15),
    legend.title = element_blank(),
    legend.text = element_text(size=14, face='bold'),
    strip.text = element_text(size=17, face='bold'),
    legend.key.size = unit(2.5, 'lines')) + 
  scale_x_continuous(name="Trial Number", breaks =c(2,4,6,8)) +
  facet_grid(~trial_complexity)

log transformed

summarized_log <- RT_data %>%
group_by(trial_number, item_type, trial_complexity) %>% 
summarise(meanRT=mean(log(rt)), 
          maxRT=max(log(rt)), 
          minRT=min(log(rt)), 
          medianRT=median(log(rt)), 
          Std=sd(log(rt)), 
          SE = std.error(log(rt)), 
          n = n())
## `summarise()` regrouping output by 'trial_number', 'item_type' (override with `.groups` argument)
ggplot(summarized_log, 
       aes(x= trial_number, y=medianRT, colour=item_type)) + 
  geom_line() + 
  geom_errorbar(aes(ymin=medianRT-SE, 
                    ymax=medianRT+SE),
                width=0.8, size = 0.8, position = position_dodge(width = 0.2), 
                show.legend = FALSE, alpha = 0.8) +
  geom_point(position = position_dodge(width = 0.2), size=2.5) + 
  geom_line(size=1.2, position = position_dodge(width = 0.2)) + 
  ylab("log(RT) [msec]") +
  theme_gray()  + theme(
    panel.grid.minor = element_blank(), 
    plot.title = element_text(hjust=0.5, size=15, face="bold"),
    axis.title.x = element_text(size=18, face='bold'),
    axis.title.y = element_text(size=18, face='bold'),
    axis.text = element_text(size=15),
    legend.title = element_blank(),
    legend.text = element_text(size=14, face='bold'),
    strip.text = element_text(size=8, face='bold'),
    legend.key.size = unit(2.5, 'lines')) + 
  scale_x_continuous(name="Trial Number", breaks =c(2,4,6,8)) +
  facet_grid(item_type ~ trial_complexity)

Preference test

raw

overall

pref_data %>% 
  ggplot(aes(x = stim_type_selected)) + 
  geom_histogram(stat = "count")
## Warning: Ignoring unknown parameters: binwidth, bins, pad

by block type

pref_data %>% 
  ggplot(aes(x = stim_type_selected)) + 
  geom_histogram(stat = "count") + 
  facet_wrap(~block_type)
## Warning: Ignoring unknown parameters: binwidth, bins, pad

by block number

pref_data %>% 
  ggplot(aes(x = stim_type_selected)) + 
  geom_histogram(stat = "count") + 
  facet_wrap(~block_number) + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
## Warning: Ignoring unknown parameters: binwidth, bins, pad

check participant pattern

pref_data %>% 
  group_by(subject) %>% 
  count(stim_type_selected) %>% 
  filter(stim_type_selected == "novel") %>% 
  mutate(percent_novel = n/20) %>% 
  ggplot(aes(x = percent_novel)) +
  geom_histogram(bins = 20)

relationship with RT

overall

pref_data %>% 
  ggplot(aes(x = stim_type_selected, 
             y = log(trial_looking_time))) + 
  geom_point(alpha = 0.05, 
             position = position_jitter(width = 0.3)) + 
   stat_summary(fun.data = "mean_cl_boot") 

by block type

pref_data %>% 
  ggplot(aes(x = stim_type_selected, 
             y = log(trial_looking_time))) + 
  geom_point(alpha = 0.05, 
             position = position_jitter(width = 0.3)) + 
   stat_summary(fun.data = "mean_cl_boot")  + 
  facet_wrap(~block_type)

RT and block num

how long participants spent on the pref test

by block type

pref_data %>% 
  group_by(block_number, block_type) %>% 
summarise(meanRT=mean(log(trial_looking_time)), 
          maxRT=max(log(trial_looking_time)), 
          minRT=min(log(trial_looking_time)), 
          medianRT=median(log(trial_looking_time)), 
          Std=sd(log(trial_looking_time)), 
          SE = std.error(log(trial_looking_time)), 
          n = n()) %>% 
  ggplot( 
       aes(x= block_number, y=medianRT, colour=block_type)) + 
  geom_line() + 
  geom_errorbar(aes(ymin=medianRT-SE, 
                    ymax=medianRT+SE),
                width=0.8, size = 0.8, position = position_dodge(width = 0.2), 
                show.legend = FALSE, alpha = 0.8) +
  geom_point(position = position_dodge(width = 0.2), size=2.5) + 
  geom_line(size=1.2, position = position_dodge(width = 0.2)) + 
  ylab("log(RT) [msec]") +
  theme_gray()  + theme(
    panel.grid.minor = element_blank(), 
    plot.title = element_text(hjust=0.5, size=15, face="bold"),
    axis.title.x = element_text(size=18, face='bold'),
    axis.title.y = element_text(size=18, face='bold'),
    axis.text = element_text(size=15),
    legend.title = element_blank(),
    legend.text = element_text(size=14, face='bold'),
    strip.text = element_text(size=8, face='bold'),
    legend.key.size = unit(2.5, 'lines')) + 
  scale_x_continuous(name="Block Number") +
  facet_wrap(~block_type) 
## `summarise()` regrouping output by 'block_number' (override with `.groups` argument)

by choice

pref_data %>% 
  group_by(block_number, stim_type_selected) %>% 
summarise(meanRT=mean(log(trial_looking_time)), 
          maxRT=max(log(trial_looking_time)), 
          minRT=min(log(trial_looking_time)), 
          medianRT=median(log(trial_looking_time)), 
          Std=sd(log(trial_looking_time)), 
          SE = std.error(log(trial_looking_time)), 
          n = n()) %>% 
  ggplot( 
       aes(x= block_number, y=medianRT, colour=stim_type_selected)) + 
  geom_line() + 
  geom_errorbar(aes(ymin=medianRT-SE, 
                    ymax=medianRT+SE),
                width=0.8, size = 0.8, position = position_dodge(width = 0.2), 
                show.legend = FALSE, alpha = 0.8) +
  geom_point(position = position_dodge(width = 0.2), size=2.5) + 
  geom_line(size=1.2, position = position_dodge(width = 0.2)) + 
  ylab("log(RT) [msec]") +
  theme_gray()  + theme(
    panel.grid.minor = element_blank(), 
    plot.title = element_text(hjust=0.5, size=15, face="bold"),
    axis.title.x = element_text(size=18, face='bold'),
    axis.title.y = element_text(size=18, face='bold'),
    axis.text = element_text(size=15),
    legend.title = element_blank(),
    legend.text = element_text(size=14, face='bold'),
    strip.text = element_text(size=8, face='bold'),
    legend.key.size = unit(2.5, 'lines')) + 
  scale_x_continuous(name="Block Number") +
  facet_wrap(~stim_type_selected) 
## `summarise()` regrouping output by 'block_number' (override with `.groups` argument)

Similarity ratings

Raw

similarity_data %>% 
  ggplot(aes(x = rating))+ 
  geom_histogram(bins = 30)

by intuitive similarity

similarity_data <- similarity_data %>% 
  mutate(
    complexity = case_when(
      grepl("complex", stimulus_left) | grepl("complex", stimulus_right) ~ "complex", 
      grepl("simple", stimulus_right) | grepl("simple", stimulus_right) ~ "simple"
    ), 
    stimulus_left_number = as.numeric(str_extract(stimulus_left, "[[:digit:]]+")), 
    stimulus_right_number = as.numeric(str_extract(stimulus_right, "[[:digit:]]+")), 
    similarity = case_when(
      stimulus_left_number == stimulus_right_number ~ "similar", 
      TRUE ~ "dissimilar"
    )
  )

similarity_data %>% 
  ggplot(aes(x = similarity, y= rating)) + 
  geom_point(alpha = 0.1, 
             position = position_jitter(width = 0.3)) + 
   stat_summary(fun.data = "mean_cl_boot") + 
  facet_wrap(~complexity)

Complexity Ratings

Raw

complexity_data %>% 
  ggplot(aes(x = rating))+ 
  geom_histogram(bins = 30)

by intuitive complexity

complexity_data <- complexity_data %>% 
  mutate(
    complexity = case_when(
      grepl("complex", stimulus) ~ "complex", 
      grepl("simple", stimulus) ~ "simple"
    )
  )

complexity_data %>% 
  ggplot(aes(x = complexity, y = rating)) + 
  geom_point(alpha = 0.1, 
             position = position_jitter(width = 0.3)) + 
   stat_summary(fun.data = "mean_cl_boot")